import requests
from lxml import etree
#  pip install requests -i https://pypi.tuna.tsinghua.edu.cn/simple

url = "https://www.ifeng.com/"

headers = {
    "Host":"www.ifeng.com",
    "User-Agent":"5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
    "Cookie":"prov=cn0551; city=0553; weather_city=ah_wh; region_ip=218.22.75.x; region_ver=1.2; userid=1678168879601_s5dbff8952; __bid_n=186baa9013ef212fe34207; adb_isBlock=0; wxIsclose=false; Hm_lvt_854ddd4a39be7c994420d51fb2e3ded7=1678235800,1678345105,1678346615,1678946904; FPTOKEN=CTwpGIdZoPmI/qTe3QuVe64KSZuiNu5suO88FmwA0Gt+zJc7LkO18M9ZtuT8ZrKk8zZyo3sO3w/ayZ0Cek9bazJl72J7iz2O+CGvEPAfB+j76x31JMxKoizi4boDAANyJCNmFLGEApoT1sczL/7usiiJF+Tf7/xDHAqqC7yVQ9rafnIlspkKzJN1FGYDgGwd2KvkTQISvBUbowySn2uZeyKkDYnWL/G6AySmuXHwnrh+2V6BiImaQRF4Vvw1CchXBPe9gGWVvCfy6xnkPxjRlqMH3fxggsmrJ/q5wIv04125BmChoQoo9oHOJt8aVma0QDoweV+ZOwV043kw8VwH1nIrUToeEAR0baTI6CPJkaJr2j99agHytwrWqXM4qEdIPvDfsiW25MJ2C6u5sN2XpQ==|oFfid1GirTvU0yY9odbxV3MwPP6MRkRwu8nxfoZQ/j0=|10|6772990ace500711445ec67de2b96a02; Hm_lpvt_854ddd4a39be7c994420d51fb2e3ded7=1678947999; ifengWindowCookieName_919=2"
}

# 拿到单个页面数据，返回的是html格式
# 使用基于xpath表达式语言提取数据
response = requests.get(url,headers=headers)
htmlText = response.text
# print(htmlText)
# xpathString = "/html/body/div[1]/div/div[6]/div[2]/div[2]/div[1]/div/h3/a[1]"
# xpathString = "/html/body/div[1]/div/div[11]/div[1]/div[2]/a"
xpathString = '//h3[@class="list_title-nOvTJ00k big-3QPOjsEI"]'

# 实例化根元素对象
htmlRoot = etree.HTML(htmlText)

# 开始解析（开始提取）
# 从根对象，找头条新闻元素,传入xpath表达式
# 返回总是一个列表
h3Elements =  htmlRoot.xpath(xpathString)
print(len(h3Elements))

for h3 in h3Elements:
    # 从当前元素h3 ，子元素a,取子元素a的包含的文本和a的href属性值
    aList = h3.xpath("./a")
    print(aList)
    if(len(aList) == 1):
           aElement = aList[0]
           print(aElement)
           aText = aElement.xpath("./text()")
           aHref = aElement.xpath("./@href")
           print(aText[0],"-",aHref[0])




